Data Source

文章標題分析

文章種類 Top 20

ptt_dt[, category := 
         stringr::str_extract(title, "^\\[([^]]+?)\\]")]
ptt_dt[!is.na(category), .N, by = .(category)][order(-N)] %>% head(20)

PTT Keelung 發文種類分佈

top20 <- ptt_dt[!is.na(category), .N, by = .(category)][order(-N)] %>% head(20)
d <- ptt_dt[category %in% top20$category, .(category, post_time)][
  , category := factor(category, levels = top20$category)][
    , post_date := as.Date(post_time)]
ggplot(d, aes(x = post_date, color = category, fill = category)) +
  stat_density(geom = "area", alpha = 0.2) +
  # stat_bin(binwidth = 30, geom = "area", alpha = 0.2) +
  # scale_color_brewer(palette = "Set2") +
  # scale_fill_brewer(palette = "Set2") +
  ggtitle("PTT Keelung 發文種類分佈") +
  guides(color = guide_legend(title = NULL, label.position = "top", 
                              nrow=2, byrow = TRUE),
         fill = FALSE) +
  facet_wrap(~ category, ncol = 5) +
  theme_bw() +
  theme(legend.position="none", text = element_text(family = "STHeiti", 
                                                    size = 14))

Text Mining

  1. 排除 [交易], [徵求], [贈送], [公告], [協尋], [自介] 類別文章
  2. 利用 tf-idf 關鍵詞算法,處理高頻詞高估及低頻詞低估的問題,取得整個文檔的關鍵詞
key_term <- dtm_train_tfidf %>% find_freq_terms(3) %>% 
  colSums() %>% 
  data.frame() %>% 
  data.table(keep.rownames = TRUE) %>% 
  setnames(c("keyword", "sum_tf_idf")) %>% 
  .[order(-sum_tf_idf)]
key_term %>% head(100) %>% DT::datatable()
d <- key_term %>% head(200)
ncolor <- nrow(d)
getPalette = colorRampPalette(RColorBrewer::brewer.pal(8, "Set2"))
wordcloud2(d, 
           size = 0.5,
           fontFamily = "Noto Sans CJK TC", 
           fontWeight = "normal",
           rotateRatio = 0,
           color = getPalette(ncolor),
           shape = "circle")

Topic Models

# Preprocessing ------------------------------------------------

doc.list <- post_text_seg

## tf-idf
# define tfidf model
tfidf = TfIdf$new()
# fit model to train data and transform train data with fitted model
dtm_train_tfidf = fit_transform(dtm, tfidf)
# tfidf modified by fit_transform() call!

l1 <- dtm_train_tfidf %>% find_freq_terms(lowfreq = 5) %>% 
  colSums() %>% median()
l1_terms <- (dtm_train_tfidf %>% find_freq_terms(lowfreq = 5) %>% 
  colSums() > l1) %>% names

# compute the table of terms:
# term.table <- dtm %>% slam::col_sums()
# term.table <- sort(term.table, decreasing = TRUE)
term.table <- setNames(vocab$vocab$terms_counts, vocab$vocab$terms)

# remove terms that are stop words or occur fewer than 5 times:
# del <- term.table < 5
# term.table <- term.table[!del]
# vocab <- names(term.table)

get_terms <- function(doc.list, vocab) {
  index <- match(doc.list, vocab)
  index <- index[!is.na(index)]
  rbind(as.integer(index - 1), as.integer(rep(1, length(index))))
}
documents <- mclapply(doc.list, get_terms, vocab=l1_terms, mc.cores = 3)

# Compute some statistics related to the data set:
D <- length(documents)  # number of documents (2,000)
W <- length(vocab)  # number of terms in the vocab (14,568)
doc.length <- sapply(documents, function(x) sum(x[2, ]))  # number of tokens per document [312, 288, 170, 436, 291, ...]
N <- sum(doc.length)  # total number of tokens in the data (546,827)
term.frequency <- as.integer(term.table)  # frequencies of terms in the corpus
# 跑個模擬,挑一個好的主題數 -----------------------------------

doc.list <- ptt_dt[, post_text] %>% 
  mclapply(cutter, worker = mix_seg, mc.cores = 3) %>% 
  mclapply(function(x) x[!is.na(x)], mc.cores = 3) 
dtm <- doc.list %>% seglist_to_dtm %>% filter_tfidf_dtm

# https://cran.r-project.org/web/packages/ldatuning/vignettes/topics.html
tic <- Sys.time()
result <- FindTopicsNumber(
  dtm,
  topics = c(#seq(2, 6, by = 2),
             seq(10, 60, by = 5),
             seq(60, 100, by = 10)#,
             # seq(120, 200, by = 20)
             ),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010"),
  method = "Gibbs",
  control = list(seed = Sys.time() %>% as.numeric()),
  mc.cores = 3L,
  verbose = TRUE
)
Sys.time() - tic
save(result, file = "models/ptt_kl_simmulation.RData")
FindTopicsNumber_plot(result)
# Topic Model ----------------------------------

# vocab_temp <- dtm_train_tfidf %>% filter_tfidf_dtm() %>% colnames()
# term.table <- vocab_tbl$vocab %>% 
#   data.table() %>% 
#   .[terms %in% vocab_temp && terms >= 2]
# term.frequency <- term.table[, terms_counts]
# vocab <- term.table[, terms]


# MCMC and model tuning parameters:
K <- 55  # n_topic
G <- 3000 # num.iterations
alpha <- 0.02
eta <- 0.02

# Fit the model:
set.seed(2016)
t1 <- Sys.time()
lda_fit <- lda.collapsed.gibbs.sampler(
  documents = documents, K = K, vocab = vocab, 
  num.iterations = G, alpha = alpha, 
  eta = eta, initial = NULL, burnin = 0,
  compute.log.likelihood = TRUE)
t2 <- Sys.time()
t2 - t1  # about 2.899927 mins on server

# Save Result
save(lda_fit, file = "./models/ptt_keelung_lda_fit.RData")

根據指標選擇 55 個 topic cluster

Result

library(lda)
load("./models/ptt_keelung_lda_fit.RData")
# Top topic result
top_docs_num <- lda_fit$document_sums %>% top.topic.documents(5)
top_words <- lda_fit$topics %>% top.topic.words(num.words = 6, by.score = TRUE) %>% 
  data.frame() %>% data.table()

top_words %>% DT::datatable()

word2vec

library(wordVectors)

# Prepare tokenizes text file
ptt_keelung_split <- post_text_seg %>%
  sapply(paste, collapse = " ")
ptt_keelung_split %>% write_lines("data/tokenized/ptt_keelung_split.txt")

# Fit models
tic <- Sys.time()
vector_set <- train_word2vec(train_file = "data/tokenized/ptt_keelung_split.txt",
                          output_file = "models/ptt_keelung_word2vec.bin",
                          force = TRUE,
                          vectors = 100,
                          threads = parallel::detectCores()-1,
                          window = 12)
print(Sys.time() - tic)

相近關聯詞

  • 景點
nearest_to(vector_set, vector_set[["景點"]], n = 20)
  • 停車
nearest_to(vector_set, vector_set[["停車"]], n = 20)
  • 交通
nearest_to(vector_set, vector_set[["交通"]], n = 20)

向量 (根據文字向量距離由小至大排列)

  • 遊客:夜市= 本地人:?
nearest_to(vector_set,
vector_set[["遊客"]] - vector_set[["夜市"]] + vector_set[["本地人"]],
n = 10)
  • 基隆:市長 = 台北 : ?
nearest_to(vector_set,
vector_set[["基隆"]] - vector_set[["市長"]] + vector_set[["台北"]],
n = 10)
  • 基隆:海洋廣場 = 台北 : ?
nearest_to(vector_set,
vector_set[["基隆"]] - vector_set[["海洋廣場"]] + vector_set[["台北"]],
n = 10)

- 基隆:交通=台北:?

nearest_to(vector_set,
vector_set[["基隆"]] - vector_set[["交通"]] + vector_set[["台北"]],
n = 10)
  • 基隆:交通=新北市:?
nearest_to(vector_set,
vector_set[["基隆"]] - vector_set[["交通"]] + vector_set[["新北市"]],
n = 10)
  • 基隆:河 = 台北:?
nearest_to(vector_set,
vector_set[["基隆"]] - vector_set[["河"]] + vector_set[["台北"]],
n = 10)

Reference

LS0tCnRpdGxlOiAiUFRUIEtlZWx1bmcgQW5hbHlzaXMiCmF1dGhvcjogIkxVIFlJIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazogCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIGhpZ2hsaWdodDogcHlnbWVudHMKICAgIHRoZW1lOiBmbGF0bHkKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KAoJZWNobyA9IEZBTFNFLAoJbWVzc2FnZSA9IEZBTFNFLAoJd2FybmluZyA9IEZBTFNFLAoJY2FjaGUgPSBUUlVFLAoJY29sbGFwc2UgPSBUUlVFLAoJZmlnLmFsaWduID0gImNlbnRlciIsCglmaWcud2lkdGggPSA4LAoJY29tbWVudCA9ICIjPiIKKQpgYGAKCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZHRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KHN0cmluZ3IpCmxpYnJhcnkodG0pICMgaW5zdGFsbC5wYWNrYWdlcygic2xhbSIsIHR5cGUgPSAiYmluYXJ5IikKbGlicmFyeSh0ZXh0MnZlYykKbGlicmFyeShqaWViYVIpICMgd29yZCBzZWdtZW50YXRpb24KbGlicmFyeSh3b3JkY2xvdWQyKQojIGxpYnJhcnkodG9waWNtb2RlbHMpCiMgaHR0cDovL3N0YWNrb3ZlcmZsb3cuY29tL3F1ZXN0aW9ucy8yNDE3MjE4OC9ob3ctY2FuLWktaW5zdGFsbC10b3BpY21vZGVscy1wYWNrYWdlLWluLXIKbGlicmFyeShsZGF0dW5pbmcpICMgU2VsZWN0IG51bWJlciBvZiB0b3BpY3MgZm9yIExEQSBtb2RlbCAjIHN1ZG8gYXB0IGluc3RhbGwgbGlibXBmci1kZXYKbGlicmFyeSh3b3JkVmVjdG9ycykgIyBkZXZ0b29sczo6aW5zdGFsbF9naXRodWIoImJtc2NobWlkdC93b3JkVmVjdG9ycyIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZmVhdGhlcikKbGlicmFyeShEVCkKbGlicmFyeShjb3JycikgIyBmb3IgY29yciBwbG90CmxpYnJhcnkoTWF0cml4KSAjIGZvciBTcGFyc2UgTWF0cml4CmxpYnJhcnkoc2xhbSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkodmlyaWRpcykKbGFwcGx5KGxpc3QuZmlsZXMoInV0aWxzIiwgcGF0dGVybiA9ICJcXC5bUnJdJCIsIGZ1bGwubmFtZXMgPSBUUlVFKSwgCiAgICAgICBGVU4gPSBmdW5jdGlvbih4KSB7c291cmNlKHgsIGVuY29kaW5nID0gIlVURi04Iik7IHJldHVybigpfSkKIyBkZXZ0b29sczo6aW5zdGFsbF9naXRodWIoInFpbndmL3JvcGVuY2MiKSAjIOe5geewoei9ieaPmwpgYGAKCiMjIERhdGEgU291cmNlCgpgYGB7ciwgaW5jbHVkZT1GQUxTRSwgY2FjaGU9VFJVRX0KcHR0X2R0IDwtIHJlYWRfZmVhdGhlcigiZGF0YS9wdHRfa2xfZGF0YS5mZWF0aGVyIikgJT4lIGRhdGEudGFibGUKCiMgIyBEYXRhIENsZWFuc2luZwpwdHRfZHRbLCBgOj1gKHBvc3RfdGltZSA9IHBvc3RfdGltZSAlPiUKICAgICAgICAgc3RycHRpbWUoIiVhICViICVkICVUICVZIiwgdHogPSAiUk9DIikgJT4lIGFzLlBPU0lYY3QpXQpwdHRfZHRbLCBgOj1gKHBvc3RfdGltZSA9IGlmZWxzZShwb3N0X3RpbWU+PWFzLlBPU0lYY3QoIjE5OTAtMDEtMDEiLCJDU1QiKSAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYXMubnVtZXJpYygpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwb3N0X3RpbWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIE5BKSldCnB0dF9kdFssIGA6PWAocG9zdF90aW1lID0gcG9zdF90aW1lICU+JSAKICAgICAgICAgICAgICAgICBhcy5QT1NJWGN0KHR6ID0gIkNTVCIsIG9yaWdpbiA9ICIxOTcwLTAxLTAxIikpXQpgYGAKCi0g5L6G5rqQ77yaUFRUIEtlZWxpbmcg5p2/Ci0g5pyf6ZaT77yaYHIgbWluKHB0dF9kdCRwb3N0X3RpbWUsIG5hLnJtPVQpYCAtIGByIG1heChwdHRfZHQkcG9zdF90aW1lLCBuYS5ybT1UKWAKLSDmlofmnKzmlbjph4/vvJpgciBucm93KHB0dF9kdClgIOevh+aWh+eroAoKIyMg5paH56ug5qiZ6aGM5YiG5p6QCgojIyMg5paH56ug56iu6aGeIFRvcCAyMAoKYGBge3J9CnB0dF9kdFssIGNhdGVnb3J5IDo9IAogICAgICAgICBzdHJpbmdyOjpzdHJfZXh0cmFjdCh0aXRsZSwgIl5cXFsoW15dXSs/KVxcXSIpXQpwdHRfZHRbIWlzLm5hKGNhdGVnb3J5KSwgLk4sIGJ5ID0gLihjYXRlZ29yeSldW29yZGVyKC1OKV0gJT4lIGhlYWQoMjApCmBgYAoKIyMjIFBUVCBLZWVsdW5nIOeZvOaWh+eorumhnuWIhuS9iAoKYGBge3J9CnRvcDIwIDwtIHB0dF9kdFshaXMubmEoY2F0ZWdvcnkpLCAuTiwgYnkgPSAuKGNhdGVnb3J5KV1bb3JkZXIoLU4pXSAlPiUgaGVhZCgyMCkKZCA8LSBwdHRfZHRbY2F0ZWdvcnkgJWluJSB0b3AyMCRjYXRlZ29yeSwgLihjYXRlZ29yeSwgcG9zdF90aW1lKV1bCiAgLCBjYXRlZ29yeSA6PSBmYWN0b3IoY2F0ZWdvcnksIGxldmVscyA9IHRvcDIwJGNhdGVnb3J5KV1bCiAgICAsIHBvc3RfZGF0ZSA6PSBhcy5EYXRlKHBvc3RfdGltZSldCmdncGxvdChkLCBhZXMoeCA9IHBvc3RfZGF0ZSwgY29sb3IgPSBjYXRlZ29yeSwgZmlsbCA9IGNhdGVnb3J5KSkgKwogIHN0YXRfZGVuc2l0eShnZW9tID0gImFyZWEiLCBhbHBoYSA9IDAuMikgKwogICMgc3RhdF9iaW4oYmlud2lkdGggPSAzMCwgZ2VvbSA9ICJhcmVhIiwgYWxwaGEgPSAwLjIpICsKICAjIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIlNldDIiKSArCiAgIyBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlID0gIlNldDIiKSArCiAgZ2d0aXRsZSgiUFRUIEtlZWx1bmcg55m85paH56iu6aGe5YiG5L2IIikgKwogIGd1aWRlcyhjb2xvciA9IGd1aWRlX2xlZ2VuZCh0aXRsZSA9IE5VTEwsIGxhYmVsLnBvc2l0aW9uID0gInRvcCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBucm93PTIsIGJ5cm93ID0gVFJVRSksCiAgICAgICAgIGZpbGwgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofiBjYXRlZ29yeSwgbmNvbCA9IDUpICsKICB0aGVtZV9idygpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiLCB0ZXh0ID0gZWxlbWVudF90ZXh0KGZhbWlseSA9ICJTVEhlaXRpIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzaXplID0gMTQpKQpgYGAKCiMjIFRleHQgTWluaW5nCgpgYGB7ciB0bSBmdW5jdGlvbnMsIGluY2x1ZGU9RkFMU0V9CiMjIOi1t+aJi+W8j++8jOe1kOW3tOW7uueri+aWt+ipnuWZqAptaXhfc2VnIDwtIHdvcmtlcih0eXBlID0gIm1peCIsCiAgICAgICAgICAgICAgICAgIHVzZXIgPSAidXRpbHMvZGljdF91dGY4LnR4dCIsCiAgICAgICAgICAgICAgICAgIHN0b3Bfd29yZCA9ICJ1dGlscy9zdG9wX3V0ZjgudHh0IiwKICAgICAgICAgICAgICAgICAgc3ltYm9sID0gRkFMU0UsCiAgICAgICAgICAgICAgICAgIGVuY29kaW5nID0gIlVURi04IikKaG1tX3NlZyA8LSB3b3JrZXIodHlwZSA9ICJobW0iLAogICAgICAgICAgICAgICAgICB1c2VyID0gInV0aWxzL2RpY3RfdXRmOC50eHQiLAogICAgICAgICAgICAgICAgICBzdG9wX3dvcmQgPSAidXRpbHMvc3RvcF91dGY4LnR4dCIsCiAgICAgICAgICAgICAgICAgIHN5bWJvbCA9IEZBTFNFLAogICAgICAgICAgICAgICAgICBlbmNvZGluZyA9ICJVVEYtOCIpCiMgbWl4X3NlZyA8PSBwb3N0X3RleHRbMV0gIyB0cnkgZmlyc3QgcG9zdAoKIyBzZWxmLW1hZGUgZmlsdGVyIChidWlsdC1pbiBwZXJsJ3MgcmVndWxhciBleHByZXNzaW9uIGhhcyBidWcpCmN1dHRlciA8LSBmdW5jdGlvbiAodGV4dCwgd29ya2VyID0gbWl4X3NlZykgewogICMgdGV4dCA9ICLppqzoi7HkuZ3ljrvkuJbmlrDlpKflrbjmvJTorJsiCiAgaWYgKHRleHQgJWluJSBjKCIuIiwgIiIpKSB7CiAgICByZXR1cm4oTkFfY2hhcmFjdGVyXykKICB9CiAgCiAgZmlsdGVyX3dvcmRzID0gYygKICAgICLpo58o5ZOBKT/lroko5YWoKT8iLCLpo5/lk4EiLAogICAgIuiLseaWhyQiLCLlubRcXG4iLCLlqpJcXG4iLAogICAgIuaIkS4/Iiwi5LuWLj8iLCLkvaAuPyIsCiAgICAi5omA5LulIiwi5Y+v5LulIiwi5rKS5pyJIiwi5LiN6YGOIiwi5Zug54K6IiwKICAgICLpgoTmmK8iLCLoprrlvpciLCLlpKflrrYiLCLmr5TovIMiLCLmhJ/oproiLCLmmYLlgJkiLCLnj77lnKgiLCLmmYLplpMiLAogICAgIuWPr+iDvSIsIuadseilvyIsIueEtuW+jCIsIuiAjOS4lCIsIuiHquW3sSIsIuaciem7niIsCiAgICAi6YCZ6YKKIiwi6YKjLiIsIueZvOePviIsIumblueEtiIsIuS4jeimgSIsIumChOaYryIsCiAgICAi5LiA5qijIiwi55+l6YGTIiwi55yL5YiwIiwi55yf55qEIiwi5LuK5aSpIiwi5bCx5pivIiwi6YCZ5qijIiwi5aaC5p6cIiwKICAgICLkuI3mnIMiLCLku4DpurwiLCLlvozkvoYiLCLllY/poYwiLCLkuYvliY0iLCLlj6rmmK8iLCLmiJbmmK8iLCLnmoToqbEiLAogICAgIuWFtuS7liIsIumAmem6vCIsIuW3sue2kyIsIuW+iOWkmiIsIuWHuuS+hiIsIuaVtOWAiyIsIuS9huaYryIsIuWNuyIsCiAgICAi5YGP5YGPIiwi5aaC5p6cIiwi5LiN6YGOIiwi5Zug5q2kIiwi5oiWIiwi5Y+IIiwi5LmfIiwi5YW25a+mIiwKICAgICLluIzmnJsiLCLntZDmnpwiLCLmgI7purwiLCLnlbbnhLYiLCLmnInkupsiLCLku6XkuIoiLCLlj6blpJYiLCLmraTlpJYiLAogICAgIuS7peWkliIsIuijoemdoiIsIumDqOWIhiIsIuebtOaOpSIsIuWJm+WlvSIsIueUseaWvCIsCiAgICAi5Y6f5pysIiwi5qiZ6aGMIiwi5pmC6ZaTIiwi5pel5pyfIiwi5L2c6ICFIiwi6YCZ56iuIiwi6KGo56S6Iiwi55yL6KaLIiwKICAgICLkvLzkuY4iLCLkuIDljYoiLCLkuIDloIYiLCLlj43mraMiLCLluLjluLgiLCLlub7lgIsiLCLnm67liY0iLCLkuIrmrKEiLAogICAgIuWFrOWRiiIsIuWPquWlvSIsIuWTquijoSIsIuS4gC4iLCLmgI7purwiLCLlpb3lg48iLCLntZDmnpwiLAogICAgIuiAjOW3siIsICLlsYXnhLYiLCAi6Kyd6KydIiwi6KuL5ZWPIiwi5aSn5aSnIiwi5bCP5byfIiwgIuaWh+eroOS7o+eivCIsCiAgICAicG8iLCJ4ZCIsIuaHieipsiIsIuacgOW+jCIsIuacieaykuaciSIsInNlbnQiLCJmcm9tIiwibXkiLAogICAgIkFuZHJvaWQiLCAiSlBUVCIsICLlpoLmj5AiLCLlpoLpoYwiLCLnt6jovK8iLCLlvJXov7AiLCLpipjoqIAiLCLnq5nlhafkv6EiLAogICAgIuiomOiAhSIsCiAgICAi5Lit5b+DIiwi5LmLLiIsIuaMh+WHuiIsIuaci+WPiyIsCiAgICAi5LqGIiwi5LmfIiwi55qEIiwi5ZyoIiwi6IiHIiwi5Y+KIiwi562JIiwi5pivIiwidGhlIiwiYW5kIiwKICAgICLmnIgiLCAi5bm0IiwgIuaXpSIsICLmmYIiLCAiTkEiLAogICAgImNvbSIsCiAgICAiXFxzIiwKICAgICJbYS16QS1aXSIsCiAgICAiWzAtOV0iCiAgKQogIHBhdHRlcm4gPC0gc3ByaW50ZigiXiVzIiwgcGFzdGUoZmlsdGVyX3dvcmRzLCBjb2xsYXBzZSA9ICJ8XiIpKQogIHRyeUNhdGNoKHsKICAgIHRleHRfc2VnIDwtIHdvcmtlciA8PSB0ZXh0CiAgfSwgZXJyb3IgPSBmdW5jdGlvbihlKSB7CiAgICBzdG9wKCciJywgdGV4dCwgJyIgPj4gJywgZSkKICB9KQogIGZpbHRlcl9zZWcgPC0gdGV4dF9zZWdbIXN0cmluZ3I6OnN0cl9kZXRlY3QodGV4dF9zZWcsIHBhdHRlcm4pXQogIGZpbHRlcl9zZWcKfQpgYGAKCi0g6Zec6Y216KmeIFRvcCAxMDAKCjEuIOaOkumZpCBgciBjKCJb5Lqk5piTXSIsICJb5b615rGCXSIsICJb6LSI6YCBXSIsICJb5YWs5ZGKXSIsICJb5Y2U5bCLXSIsICJb6Ieq5LuLXSIpYCDpoZ7liKXmlofnq6AKMi4g5Yip55SoIHRmLWlkZiDpl5zpjbXoqZ7nrpfms5XvvIzomZXnkIbpq5jpoLvoqZ7pq5jkvLDlj4rkvY7poLvoqZ7kvY7kvLDnmoTllY/poYzvvIzlj5blvpfmlbTlgIvmlofmqpTnmoTpl5zpjbXoqZ4KCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQojIHNlZ21lbnQKbGlicmFyeShwYXJhbGxlbCkKcG9zdF90ZXh0X3NlZyA8LSBwdHRfZHRbCiAgIWNhdGVnb3J5ICVpbiUgYygiW+S6pOaYk10iLCAiW+W+teaxgl0iLCAiW+i0iOmAgV0iLCAiW+WFrOWRil0iLCAiW+WNlOWwi10iLCAiW+iHquS7i10iKQogICwgcG9zdF90ZXh0XSAlPiUgCiAgbWNsYXBwbHkoY3V0dGVyLCB3b3JrZXIgPSBtaXhfc2VnLCBtYy5jb3JlcyA9IDMpICU+JSAKICBtY2xhcHBseShmdW5jdGlvbih4KSB4WyFpcy5uYSh4KV0sIG1jLmNvcmVzID0gMykKIyBhZGp1c3QgdG8gdGhlIGZvcm1hdCBmb3IgdGV4dDJ2ZWM6Oml0b2tlbgpwb3N0X3RleHRfdG9rZW4gPC0gaXRva2VuKHBvc3RfdGV4dF9zZWcpCmBgYAoKYGBge3IsIGluY2x1ZGU9RkFMU0V9CiMgdW5pcXVlIHdvcmQgbWF0cml4CnZvY2FiIDwtIGNyZWF0ZV92b2NhYnVsYXJ5KHBvc3RfdGV4dF90b2tlbiwgbmdyYW09YygxTCwgMkwpKQojIGR0bQp2ZWN0b3JpemVyIDwtIHZvY2FiX3ZlY3Rvcml6ZXIodm9jYWIpCmR0bSA8LSBjcmVhdGVfZHRtKHBvc3RfdGV4dF90b2tlbiwgdmVjdG9yaXplcikKIyByZW1vdmUgMS13b3JkIHRlcm0KZHRtIDwtIGR0bVssIGR0bSAlPiUgY29sbmFtZXMoKSAlPiUgbmNoYXIgPj0gMl0KCiMgZHRtICU+JSBmaW5kX2ZyZXFfdGVybXMoMzApICMgbm90IGdvb2QKCiMjIHRmLWlkZgojIGRlZmluZSB0ZmlkZiBtb2RlbAp0ZmlkZiA9IFRmSWRmJG5ldygpCiMgZml0IG1vZGVsIHRvIHRyYWluIGRhdGEgYW5kIHRyYW5zZm9ybSB0cmFpbiBkYXRhIHdpdGggZml0dGVkIG1vZGVsCmR0bV90cmFpbl90ZmlkZiA9IGZpdF90cmFuc2Zvcm0oZHRtLCB0ZmlkZikKIyB0ZmlkZiBtb2RpZmllZCBieSBmaXRfdHJhbnNmb3JtKCkgY2FsbCEKYGBgCgpgYGB7cn0Ka2V5X3Rlcm0gPC0gZHRtX3RyYWluX3RmaWRmICU+JSBmaW5kX2ZyZXFfdGVybXMoMykgJT4lIAogIGNvbFN1bXMoKSAlPiUgCiAgZGF0YS5mcmFtZSgpICU+JSAKICBkYXRhLnRhYmxlKGtlZXAucm93bmFtZXMgPSBUUlVFKSAlPiUgCiAgc2V0bmFtZXMoYygia2V5d29yZCIsICJzdW1fdGZfaWRmIikpICU+JSAKICAuW29yZGVyKC1zdW1fdGZfaWRmKV0Ka2V5X3Rlcm0gJT4lIGhlYWQoMTAwKSAlPiUgRFQ6OmRhdGF0YWJsZSgpCmBgYAoKLSBXb3JkY2xvdWQKCmBgYHtyfQpkIDwtIGtleV90ZXJtICU+JSBoZWFkKDIwMCkKbmNvbG9yIDwtIG5yb3coZCkKZ2V0UGFsZXR0ZSA9IGNvbG9yUmFtcFBhbGV0dGUoUkNvbG9yQnJld2VyOjpicmV3ZXIucGFsKDgsICJTZXQyIikpCndvcmRjbG91ZDIoZCwgCiAgICAgICAgICAgc2l6ZSA9IDAuNSwKICAgICAgICAgICBmb250RmFtaWx5ID0gIk5vdG8gU2FucyBDSksgVEMiLCAKICAgICAgICAgICBmb250V2VpZ2h0ID0gIm5vcm1hbCIsCiAgICAgICAgICAgcm90YXRlUmF0aW8gPSAwLAogICAgICAgICAgIGNvbG9yID0gZ2V0UGFsZXR0ZShuY29sb3IpLAogICAgICAgICAgIHNoYXBlID0gImNpcmNsZSIpCmBgYAoKIyMjIFRvcGljIE1vZGVscwoKYGBge3IsIGV2YWw9RkFMU0V9CiMgUHJlcHJvY2Vzc2luZyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KCmRvYy5saXN0IDwtIHBvc3RfdGV4dF9zZWcKCiMjIHRmLWlkZgojIGRlZmluZSB0ZmlkZiBtb2RlbAp0ZmlkZiA9IFRmSWRmJG5ldygpCiMgZml0IG1vZGVsIHRvIHRyYWluIGRhdGEgYW5kIHRyYW5zZm9ybSB0cmFpbiBkYXRhIHdpdGggZml0dGVkIG1vZGVsCmR0bV90cmFpbl90ZmlkZiA9IGZpdF90cmFuc2Zvcm0oZHRtLCB0ZmlkZikKIyB0ZmlkZiBtb2RpZmllZCBieSBmaXRfdHJhbnNmb3JtKCkgY2FsbCEKCmwxIDwtIGR0bV90cmFpbl90ZmlkZiAlPiUgZmluZF9mcmVxX3Rlcm1zKGxvd2ZyZXEgPSA1KSAlPiUgCiAgY29sU3VtcygpICU+JSBtZWRpYW4oKQpsMV90ZXJtcyA8LSAoZHRtX3RyYWluX3RmaWRmICU+JSBmaW5kX2ZyZXFfdGVybXMobG93ZnJlcSA9IDUpICU+JSAKICBjb2xTdW1zKCkgPiBsMSkgJT4lIG5hbWVzCgojIGNvbXB1dGUgdGhlIHRhYmxlIG9mIHRlcm1zOgojIHRlcm0udGFibGUgPC0gZHRtICU+JSBzbGFtOjpjb2xfc3VtcygpCiMgdGVybS50YWJsZSA8LSBzb3J0KHRlcm0udGFibGUsIGRlY3JlYXNpbmcgPSBUUlVFKQp0ZXJtLnRhYmxlIDwtIHNldE5hbWVzKHZvY2FiJHZvY2FiJHRlcm1zX2NvdW50cywgdm9jYWIkdm9jYWIkdGVybXMpCgojIHJlbW92ZSB0ZXJtcyB0aGF0IGFyZSBzdG9wIHdvcmRzIG9yIG9jY3VyIGZld2VyIHRoYW4gNSB0aW1lczoKIyBkZWwgPC0gdGVybS50YWJsZSA8IDUKIyB0ZXJtLnRhYmxlIDwtIHRlcm0udGFibGVbIWRlbF0KIyB2b2NhYiA8LSBuYW1lcyh0ZXJtLnRhYmxlKQoKZ2V0X3Rlcm1zIDwtIGZ1bmN0aW9uKGRvYy5saXN0LCB2b2NhYikgewogIGluZGV4IDwtIG1hdGNoKGRvYy5saXN0LCB2b2NhYikKICBpbmRleCA8LSBpbmRleFshaXMubmEoaW5kZXgpXQogIHJiaW5kKGFzLmludGVnZXIoaW5kZXggLSAxKSwgYXMuaW50ZWdlcihyZXAoMSwgbGVuZ3RoKGluZGV4KSkpKQp9CmRvY3VtZW50cyA8LSBtY2xhcHBseShkb2MubGlzdCwgZ2V0X3Rlcm1zLCB2b2NhYj1sMV90ZXJtcywgbWMuY29yZXMgPSAzKQoKIyBDb21wdXRlIHNvbWUgc3RhdGlzdGljcyByZWxhdGVkIHRvIHRoZSBkYXRhIHNldDoKRCA8LSBsZW5ndGgoZG9jdW1lbnRzKSAgIyBudW1iZXIgb2YgZG9jdW1lbnRzICgyLDAwMCkKVyA8LSBsZW5ndGgodm9jYWIpICAjIG51bWJlciBvZiB0ZXJtcyBpbiB0aGUgdm9jYWIgKDE0LDU2OCkKZG9jLmxlbmd0aCA8LSBzYXBwbHkoZG9jdW1lbnRzLCBmdW5jdGlvbih4KSBzdW0oeFsyLCBdKSkgICMgbnVtYmVyIG9mIHRva2VucyBwZXIgZG9jdW1lbnQgWzMxMiwgMjg4LCAxNzAsIDQzNiwgMjkxLCAuLi5dCk4gPC0gc3VtKGRvYy5sZW5ndGgpICAjIHRvdGFsIG51bWJlciBvZiB0b2tlbnMgaW4gdGhlIGRhdGEgKDU0Niw4MjcpCnRlcm0uZnJlcXVlbmN5IDwtIGFzLmludGVnZXIodGVybS50YWJsZSkgICMgZnJlcXVlbmNpZXMgb2YgdGVybXMgaW4gdGhlIGNvcnB1cwpgYGAKCmBgYHtyLCBldmFsPUZBTFNFfQojIOi3keWAi+aooeaTrO+8jOaMkeS4gOWAi+WlveeahOS4u+mhjOaVuCAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQoKZG9jLmxpc3QgPC0gcHR0X2R0WywgcG9zdF90ZXh0XSAlPiUgCiAgbWNsYXBwbHkoY3V0dGVyLCB3b3JrZXIgPSBtaXhfc2VnLCBtYy5jb3JlcyA9IDMpICU+JSAKICBtY2xhcHBseShmdW5jdGlvbih4KSB4WyFpcy5uYSh4KV0sIG1jLmNvcmVzID0gMykgCmR0bSA8LSBkb2MubGlzdCAlPiUgc2VnbGlzdF90b19kdG0gJT4lIGZpbHRlcl90ZmlkZl9kdG0KCiMgaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL2xkYXR1bmluZy92aWduZXR0ZXMvdG9waWNzLmh0bWwKdGljIDwtIFN5cy50aW1lKCkKcmVzdWx0IDwtIEZpbmRUb3BpY3NOdW1iZXIoCiAgZHRtLAogIHRvcGljcyA9IGMoI3NlcSgyLCA2LCBieSA9IDIpLAogICAgICAgICAgICAgc2VxKDEwLCA2MCwgYnkgPSA1KSwKICAgICAgICAgICAgIHNlcSg2MCwgMTAwLCBieSA9IDEwKSMsCiAgICAgICAgICAgICAjIHNlcSgxMjAsIDIwMCwgYnkgPSAyMCkKICAgICAgICAgICAgICksCiAgbWV0cmljcyA9IGMoIkdyaWZmaXRoczIwMDQiLCAiQ2FvSnVhbjIwMDkiLCAiQXJ1bjIwMTAiKSwKICBtZXRob2QgPSAiR2liYnMiLAogIGNvbnRyb2wgPSBsaXN0KHNlZWQgPSBTeXMudGltZSgpICU+JSBhcy5udW1lcmljKCkpLAogIG1jLmNvcmVzID0gM0wsCiAgdmVyYm9zZSA9IFRSVUUKKQpTeXMudGltZSgpIC0gdGljCnNhdmUocmVzdWx0LCBmaWxlID0gIm1vZGVscy9wdHRfa2xfc2ltbXVsYXRpb24uUkRhdGEiKQpGaW5kVG9waWNzTnVtYmVyX3Bsb3QocmVzdWx0KQpgYGAKCiFbXShpbWcvbGRhX3NpbV9yZXN1bHRfcHR0LnBuZykKCmBgYHtyLCBldmFsPUZBTFNFfQojIFRvcGljIE1vZGVsIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KCiMgdm9jYWJfdGVtcCA8LSBkdG1fdHJhaW5fdGZpZGYgJT4lIGZpbHRlcl90ZmlkZl9kdG0oKSAlPiUgY29sbmFtZXMoKQojIHRlcm0udGFibGUgPC0gdm9jYWJfdGJsJHZvY2FiICU+JSAKIyAgIGRhdGEudGFibGUoKSAlPiUgCiMgICAuW3Rlcm1zICVpbiUgdm9jYWJfdGVtcCAmJiB0ZXJtcyA+PSAyXQojIHRlcm0uZnJlcXVlbmN5IDwtIHRlcm0udGFibGVbLCB0ZXJtc19jb3VudHNdCiMgdm9jYWIgPC0gdGVybS50YWJsZVssIHRlcm1zXQoKCiMgTUNNQyBhbmQgbW9kZWwgdHVuaW5nIHBhcmFtZXRlcnM6CksgPC0gNTUgICMgbl90b3BpYwpHIDwtIDMwMDAgIyBudW0uaXRlcmF0aW9ucwphbHBoYSA8LSAwLjAyCmV0YSA8LSAwLjAyCgojIEZpdCB0aGUgbW9kZWw6CnNldC5zZWVkKDIwMTYpCnQxIDwtIFN5cy50aW1lKCkKbGRhX2ZpdCA8LSBsZGEuY29sbGFwc2VkLmdpYmJzLnNhbXBsZXIoCiAgZG9jdW1lbnRzID0gZG9jdW1lbnRzLCBLID0gSywgdm9jYWIgPSB2b2NhYiwgCiAgbnVtLml0ZXJhdGlvbnMgPSBHLCBhbHBoYSA9IGFscGhhLCAKICBldGEgPSBldGEsIGluaXRpYWwgPSBOVUxMLCBidXJuaW4gPSAwLAogIGNvbXB1dGUubG9nLmxpa2VsaWhvb2QgPSBUUlVFKQp0MiA8LSBTeXMudGltZSgpCnQyIC0gdDEgICMgYWJvdXQgMi44OTk5MjcgbWlucyBvbiBzZXJ2ZXIKCiMgU2F2ZSBSZXN1bHQKc2F2ZShsZGFfZml0LCBmaWxlID0gIi4vbW9kZWxzL3B0dF9rZWVsdW5nX2xkYV9maXQuUkRhdGEiKQpgYGAKCuagueaTmuaMh+aomemBuOaThyA1NSDlgIsgdG9waWMgY2x1c3RlcgoKKipSZXN1bHQqKgoKYGBge3IsIGNhY2hlPUZBTFNFfQpsaWJyYXJ5KGxkYSkKbG9hZCgiLi9tb2RlbHMvcHR0X2tlZWx1bmdfbGRhX2ZpdC5SRGF0YSIpCiMgVG9wIHRvcGljIHJlc3VsdAp0b3BfZG9jc19udW0gPC0gbGRhX2ZpdCRkb2N1bWVudF9zdW1zICU+JSB0b3AudG9waWMuZG9jdW1lbnRzKDUpCnRvcF93b3JkcyA8LSBsZGFfZml0JHRvcGljcyAlPiUgdG9wLnRvcGljLndvcmRzKG51bS53b3JkcyA9IDYsIGJ5LnNjb3JlID0gVFJVRSkgJT4lIAogIGRhdGEuZnJhbWUoKSAlPiUgZGF0YS50YWJsZSgpCgp0b3Bfd29yZHMgJT4lIERUOjpkYXRhdGFibGUoKQpgYGAKCiMjIHdvcmQydmVjCgpgYGB7ciwgZXZhbD1GQUxTRX0KbGlicmFyeSh3b3JkVmVjdG9ycykKCiMgUHJlcGFyZSB0b2tlbml6ZXMgdGV4dCBmaWxlCnB0dF9rZWVsdW5nX3NwbGl0IDwtIHBvc3RfdGV4dF9zZWcgJT4lCiAgc2FwcGx5KHBhc3RlLCBjb2xsYXBzZSA9ICIgIikKcHR0X2tlZWx1bmdfc3BsaXQgJT4lIHdyaXRlX2xpbmVzKCJkYXRhL3Rva2VuaXplZC9wdHRfa2VlbHVuZ19zcGxpdC50eHQiKQoKIyBGaXQgbW9kZWxzCnRpYyA8LSBTeXMudGltZSgpCnZlY3Rvcl9zZXQgPC0gdHJhaW5fd29yZDJ2ZWModHJhaW5fZmlsZSA9ICJkYXRhL3Rva2VuaXplZC9wdHRfa2VlbHVuZ19zcGxpdC50eHQiLAogICAgICAgICAgICAgICAgICAgICAgICAgIG91dHB1dF9maWxlID0gIm1vZGVscy9wdHRfa2VlbHVuZ193b3JkMnZlYy5iaW4iLAogICAgICAgICAgICAgICAgICAgICAgICAgIGZvcmNlID0gVFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgICB2ZWN0b3JzID0gMTAwLAogICAgICAgICAgICAgICAgICAgICAgICAgIHRocmVhZHMgPSBwYXJhbGxlbDo6ZGV0ZWN0Q29yZXMoKS0xLAogICAgICAgICAgICAgICAgICAgICAgICAgIHdpbmRvdyA9IDEyKQpwcmludChTeXMudGltZSgpIC0gdGljKQpgYGAKYGBge3IsIGVjaG89RkFMU0V9CmxpYnJhcnkod29yZFZlY3RvcnMpCnZlY3Rvcl9zZXQgPC0gcmVhZC52ZWN0b3JzKCJtb2RlbHMvcHR0X2tlZWx1bmdfd29yZDJ2ZWMuYmluIikKYGBgCgojIyMg55u46L+R6Zec6IGv6KmeCgotIOaZr+m7ngoKYGBge3J9Cm5lYXJlc3RfdG8odmVjdG9yX3NldCwgdmVjdG9yX3NldFtbIuaZr+m7niJdXSwgbiA9IDIwKQpgYGAKCi0g5YGc6LuKCgpgYGB7cn0KbmVhcmVzdF90byh2ZWN0b3Jfc2V0LCB2ZWN0b3Jfc2V0W1si5YGc6LuKIl1dLCBuID0gMjApCmBgYAoKLSDkuqTpgJoKCmBgYHtyfQpuZWFyZXN0X3RvKHZlY3Rvcl9zZXQsIHZlY3Rvcl9zZXRbWyLkuqTpgJoiXV0sIG4gPSAyMCkKYGBgCgojIyMg5ZCR6YePICjmoLnmk5rmloflrZflkJHph4/ot53pm6LnlLHlsI/oh7PlpKfmjpLliJcpCgotIOmBiuWuou+8muWknOW4gj0g5pys5Zyw5Lq677ya77yfCgpgYGB7cn0KbmVhcmVzdF90byh2ZWN0b3Jfc2V0LAp2ZWN0b3Jfc2V0W1si6YGK5a6iIl1dIC0gdmVjdG9yX3NldFtbIuWknOW4giJdXSArIHZlY3Rvcl9zZXRbWyLmnKzlnLDkuroiXV0sCm4gPSAxMCkKYGBgCgoKLSDln7rpmobvvJrluILplbcgPSDlj7DljJcgOiA/CgpgYGB7cn0KbmVhcmVzdF90byh2ZWN0b3Jfc2V0LAp2ZWN0b3Jfc2V0W1si5Z+66ZqGIl1dIC0gdmVjdG9yX3NldFtbIuW4gumVtyJdXSArIHZlY3Rvcl9zZXRbWyLlj7DljJciXV0sCm4gPSAxMCkKYGBgCgotIOWfuumahu+8mua1t+a0i+W7o+WgtCA9IOWPsOWMlyA6ID8KCmBgYHtyfQpuZWFyZXN0X3RvKHZlY3Rvcl9zZXQsCnZlY3Rvcl9zZXRbWyLln7rpmoYiXV0gLSB2ZWN0b3Jfc2V0W1si5rW35rSL5buj5aC0Il1dICsgdmVjdG9yX3NldFtbIuWPsOWMlyJdXSwKbiA9IDEwKQpgYGAKCu+8jSDln7rpmobvvJrkuqTpgJo95Y+w5YyXOj8KCmBgYHtyfQpuZWFyZXN0X3RvKHZlY3Rvcl9zZXQsCnZlY3Rvcl9zZXRbWyLln7rpmoYiXV0gLSB2ZWN0b3Jfc2V0W1si5Lqk6YCaIl1dICsgdmVjdG9yX3NldFtbIuWPsOWMlyJdXSwKbiA9IDEwKQpgYGAKCi0g5Z+66ZqG77ya5Lqk6YCaPeaWsOWMl+W4gjo/CgpgYGB7cn0KbmVhcmVzdF90byh2ZWN0b3Jfc2V0LAp2ZWN0b3Jfc2V0W1si5Z+66ZqGIl1dIC0gdmVjdG9yX3NldFtbIuS6pOmAmiJdXSArIHZlY3Rvcl9zZXRbWyLmlrDljJfluIIiXV0sCm4gPSAxMCkKYGBgCgotIOWfuumahu+8muaysyA9IOWPsOWMlzrvvJ8KCmBgYHtyfQpuZWFyZXN0X3RvKHZlY3Rvcl9zZXQsCnZlY3Rvcl9zZXRbWyLln7rpmoYiXV0gLSB2ZWN0b3Jfc2V0W1si5rKzIl1dICsgdmVjdG9yX3NldFtbIuWPsOWMlyJdXSwKbiA9IDEwKQpgYGAKCiMjIFJlZmVyZW5jZQoKLSBbd29yZDJ2ZWM6IERpc3RyaWJ1dGVkIFJlcHJlc2VudGF0aW9ucyBvZiBXb3JkcyBhbmQgUGhyYXNlcwphbmQgdGhlaXIgQ29tcG9zaXRpb25hbGl0eV0oaHR0cHM6Ly9wYXBlcnMubmlwcy5jYy9wYXBlci81MDIxLWRpc3RyaWJ1dGVkLXJlcHJlc2VudGF0aW9ucy1vZi13b3Jkcy1hbmQtcGhyYXNlcy1hbmQtdGhlaXItY29tcG9zaXRpb25hbGl0eS5wZGYpCi0gW1IgcGFja2FnZTogd29yZFZlY3RvcnNdKGh0dHBzOi8vZ2l0aHViLmNvbS9ibXNjaG1pZHQvd29yZFZlY3RvcnMpCg==